# -*- coding: utf-8 -*-
"""
Created on Fri Dec 04 10:46:16 2015

@author: liliangrong
"""

import urllib
import re
#url = "http://python.jobbole.com/category/guide/page/5/"
#p1 = urllib.urlretrieve(url,'python_study.html')
#print p1
#print dir(p1)

#resp2 = urllib2.urlopen("python_study.html")
#print resp2.read()

resp = urllib.urlopen("file:///D:/python/python_study.html")
#print resp1.read()
page_content = resp.read()

pattern = re.compile('<a target="_blank" class="archive-title" href="(.*?)".*?>(.*?)</a>',re.S)

items = re.findall(pattern,page_content)

for page_url,page_title in items:
    print page_title," : ",page_url

# -*- coding: utf-8 -*-
# """
# Created on Fri Dec 04 10:30:07 2015
#
# @author: liliangrong
# """
#
# def get(url)
# import urllib
# import re
#
# pn = range(1,10)    #9个分页
#
# url = "http://python.jobbole.com/category/guide/page/2/"
# resp = urllib.urlopen(url)
# p1 = urllib.urlretrieve(url)
# print p1
# f = open('python_study.html','w')
# f.write(p1)
# f.close()
#
# pattern = re.compile('<a target="_blank" class="archive-title" href="(.*?)".*?>(.*?)</a>',re.S)
# page = resp.read()
# #print page
#
# url_and_title = re.findall(pattern,page)
#
# print url_and_title
#
# for item in url_and_title:
#     print item[1]," : ",item[0]
